package com.outsider.util.pageprocessor;



import java.text.SimpleDateFormat;
import java.util.Date;

import org.eclipse.jetty.util.UrlEncoded;

import com.outsider.util.Const;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;

public class JD_SearchPageprocessor implements PageProcessor{
	private Site site = Site.me().setUserAgent(Const.USER_AGENT).setSleepTime(1).setRetryTimes(3);
	@Override
	public void process(Page page) {
//		long start=System.currentTimeMillis();
//		System.out.println("JD:"+System.currentTimeMillis());
		if (page.getHtml().xpath("//*[@id=\"J_goodslist\"]").toString()==null) return;
		String name=page.getHtml().xpath("//*[@class=\"p-name\"]/a/em/font/text()").toString();
		String image=page.getHtml().xpath("//*[@class=\"p-img\"]/a/img").regex("src=\"(.*)\"").toString();
		String price=page.getHtml().xpath("//*[@class=\"p-price\"]/strong/i/text()").toString();
		String url=page.getHtml().xpath("//*[@class=\"p-name\"]/a").regex("href=\"(.*html)\"").toString();
		page.putField("name", name);
		page.putField("price", price);
		page.putField("image", "http://"+image);
		page.putField("url", url);
		System.out.println("JD:"+new SimpleDateFormat(Const.DATE_FORMAT).format(new Date()));
	}

	@Override
	public Site getSite() {
		return site;
	}
	
	public static void main(String args[]) {
		Spider.create(new JD_SearchPageprocessor()).test("http://search.jd.com/Search?keyword="+UrlEncoded.encodeString("MF840CH")+"&enc=utf-8");
		;
	}

}
